
#### HOUSEKEEPING ####

rm(list = ls())

library(tidyverse)
library(data.table)

## DIRECTORIES
source("00_paths.R")

date <- "final"

out_dir <- paste0(res_dir, "pensynth/", date, "/")
if (!file.exists(out_dir)) {
  dir.create(file.path(paste0(res_dir, "pensynth/", date)))
  dir.create(file.path(paste0(res_dir, "pensynth/", date, "/grid/")))
  dir.create(file.path(paste0(res_dir, "pensynth/", date, "/fixed/")))
  dir.create(file.path(paste0(res_dir, "pensynth/", date, "/inputs/")))
}

## MASKED PARAMETERS
start_announce_mon <- NA # first month of announcement
end_announce_mon <- NA # last month of announcement

#### DATA ####

#### ~ LOAD PREPPED PANELS ####

sm_c <- fread(file = paste0(input_dir, "submonth_control_all.csv")) %>% mutate(mkt = "Control")
sm_t <- fread(file = paste0(input_dir, "submonth_treat.csv")) %>% mutate(mkt = "Treated")
sd_c <- fread(file = paste0(input_dir, "subdate_control_all.csv")) %>% mutate(mkt = "Control")
sd_t <- fread(file = paste0(input_dir, "subdate_treat.csv")) %>% mutate(mkt = "Treated")
c    <- fread(file = paste0(input_dir, "subdate_control_list_all.csv"))
t    <- fread(file = paste0(input_dir, "subdate_treat_list.csv"))

#### ~ CONSTRUCT VARS ####

sm <- sm_t %>% rbind(sm_c)
sd <- sd_t %>% rbind(sd_c)

spre <- sm %>%
  group_by(customer_key) %>%
  mutate(mean_gb_mon1 = mean(tot_gb * (month==1))) %>%
  mutate(mean_gb_mon2 = mean(tot_gb * (month==2))) %>%
  mutate(mean_gb_mon3 = mean(tot_gb * (month==3))) %>%
  mutate(shr_vid = mean(gb_video)/mean(tot_gb)) %>%
  mutate(shr_brows = mean(gb_browsing)/mean(tot_gb)) %>%
  mutate(shr_netflix = mean(gb_netflix)/mean(tot_gb)) %>%
  mutate(shr_youtube = mean(gb_youtube)/mean(tot_gb)) %>%
  mutate(shr_linear = mean(gb_linear)/mean(tot_gb)) %>%
  ungroup() %>%
  dplyr::select(customer_key, starts_with("mean_gb_mon"), shr_vid, shr_brows, 
                shr_netflix, shr_youtube, shr_linear) %>%
  distinct()

spost <- sm %>%
  filter(month > end_announce_mon) %>%
  group_by(customer_key) %>%
  mutate(mean_gb_post = mean(tot_gb)) %>%
  ungroup() %>%
  dplyr::select(customer_key, mean_gb_post) %>%
  distinct()

sd <- sd %>%
  filter(month(date) < start_announce_mon) %>%
  group_by(customer_key) %>%
  mutate(var_gb_day = var(tot_gb)) %>%
  ungroup() %>%
  dplyr::select(customer_key, var_gb_day) %>%
  distinct()

#### ~ SAMPLE ####

set.seed(3)

n_c <- 5000

t_s <- t %>% dplyr::select(customer_key) %>% mutate(treat = 1)
c_s <- c %>% sample_n(n_c) %>% dplyr::select(customer_key) %>% mutate(treat = 0)
s   <- rbind(t_s, c_s)

sm_s <- sm %>% inner_join(s)
sd_s <- sd %>% inner_join(s)

ct <- c %>% mutate(treat = 0) %>% rbind(t, fill=TRUE) %>% mutate(treat = ifelse(is.na(treat), 1, treat)) %>% inner_join(s)

df <- ct %>% left_join(spre) %>% left_join(spost) %>% left_join(sd)

#### SAVE ####

## CREATE DIRECTORIES
if (!file.exists(out_dir)) {
  dir.create(file.path(paste0(res_dir, "pensynth/", date)))
  dir.create(file.path(paste0(res_dir, "pensynth/", date, "/nn/")))
  dir.create(file.path(paste0(res_dir, "pensynth/", date, "/grid/")))
  dir.create(file.path(paste0(res_dir, "pensynth/", date, "/fixed/")))
  dir.create(file.path(paste0(res_dir, "pensynth/", date, "/inputs/")))
  dir.create(file.path(paste0(res_dir, "pensynth/", date, "/inputs/resamp/")))
}

## DEFINE PARAMETERS
lambda <- c(0.1)
X_vars <- c("mean_gb_mon1", "mean_gb_mon2", "mean_gb_mon3",
            "shr_vid", "shr_brows", "shr_netflix", "shr_youtube", "shr_linear",
            "var_gb_day", "vid_flag", "svc_tier")

## PLAN CHOICE GROUPS
df$group <- 1

## SAVE
fwrite(x = df, file = paste0(out_dir, "inputs/data.csv"))
fwrite(x = data.frame(lambda), file = paste0(out_dir, "inputs/lambda.csv"))
fwrite(x = data.frame(X_vars), file = paste0(out_dir, "inputs/X_vars.csv"))

#### RESAMPLE CONTROLS ####

n_c <- 5000
n_s <- 200

c   <- c %>% inner_join(c_s) # SAMPLE FROM ORIGINAL n_c CONTROLS

set.seed(6392)

for (is in 1:n_s) {
  
  print(is)
  
  c_s <- c %>% sample_n(n_c, replace = TRUE) %>% dplyr::select(customer_key) %>% mutate(treat = 0) %>% distinct()
  s   <- rbind(t_s, c_s)
  
  sm_s <- sm %>% inner_join(s)
  sd_s <- sd %>% inner_join(s)
  
  ct <- c %>% mutate(treat = 0) %>% rbind(t, fill=TRUE) %>% mutate(treat = ifelse(is.na(treat), 1, treat)) %>% inner_join(s)
  
  df <- ct %>% left_join(spre) %>% left_join(spost) %>% left_join(sd)
  
  df$group <- 1
  
  fwrite(x = df, file = paste0(out_dir, "inputs/resamp/data_", is, ".csv"))
  
}

